This document presents the subset of the figures used for paper about monitoring.
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggmap)
library(scatterpie)
library(rgdal)
library(multcompView)
library(car)
library(ggpmisc)
library(chisq.posthoc.test)
library(vcd)
Sampling data correspond to the data collected with kobo and previously cleaned with the script 1_preprocesamiento_datos_kobo.Rmd.
# load data
muestreo_tidy<-read.delim("../data/kobo/muestreo_dic2020_tidy.txt", header = TRUE)
parcelas_tidy<-read.delim("../data/kobo/parcelas_dic2020_tidy.txt", header = TRUE)
# pivot long parcelas data to have health data as a single variable
parcelas_long<-pivot_longer(parcelas_tidy,
cols = healthy:worm,
names_to = "tree_health_simplified",
values_to = "n_trees")
Data analyzed here correspond only to the trees that were approved during the validation by manually reviewing the photographs in kobotoolbox. Total of 1778 trees sampled, 1765 were approved in the validation.
muestreo_tidy<- filter(muestreo_tidy, X_validation_status=="validation_status_approved")
Color palettes:
# Make a nice color pallete and legend order for all plots
my_cols=c("darkgreen",
"darkred",
"orangered1",
"cadetblue",
"tan",
"beige",
# "burlywood4",
"coral",
"aquamarine3",
"gray70",
"black")
desired_order=c("healthy",
"ozone",
"ozone_and_other",
"others_combined",
"drougth",
"fungi",
# "insect",
"worm",
"acid_rain",
"other",
"dead")
desired_names=c("healthy",
"ozone",
"ozone and other",
"others combined",
"drougth",
"fungi",
# "insect",
"worm",
"acid rain",
"other",
"dead")
# For ozone damage percentage
my_cols2<-c("darkgreen", "gold2", "chocolate1", "orangered", "red4", "darkorchid4")
desired_order_percentage<-c("0%","less than 10%", "10 to 40%", "40 to 50%", "50 to 70%", "more than 70%")
Multiplot fun:
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
Configure google api for maps:
# code adapted from https://rgraphgallery.blogspot.com/2013/04/rg-plot-pie-over-g0ogle-map.html
## configure google api
# You first need to register your api key in https://cloud.google.com/maps-platform/#get-started and follow instructions. The geocoding API is a free service, but you nevertheless need to associate a credit card with the account. Please note that the Google Maps API is not a free service. There is a free allowance of 40,000 calls to the geocoding API per month, and beyond that calls are $0.005 each.
# after you obtain your api, save it in /scripts/api_key.api (not shown in this repo por obvious reasons).
# if you get the following error when running get_map():
#"Error in aperm.default(map, c(2, 1, 3)) :
# invalid first argument, must be an array "
# check this troubleshooting: https://rgraphgallery.blogspot.com/2013/04/rg-plot-pie-over-google-map.html
## load and register api
api <- readLines("api_key.api")
register_google(key = api)
Map and monitoring figures presented in the paper:
# get cdmx shape
CDMX<-readOGR(dsn="../data/spatial", layer="CDMX")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/veronicareyesgalindo/Documents/GitHub/monitoreo-oyameles/data/spatial", layer: "CDMX"
## with 1 features
## It has 8 fields
CDMX<-fortify(CDMX)
# get PNDL shape
PNDL<-readOGR(dsn="../data/spatial", layer="Desierto_Leones_Geo_ITRF08")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS =
## dumpSRS, : Discarded datum International_Terrestrial_Reference_Frame_2008 in
## Proj4 definition: +proj=longlat +ellps=GRS80 +no_defs
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/veronicareyesgalindo/Documents/GitHub/monitoreo-oyameles/data/spatial", layer: "Desierto_Leones_Geo_ITRF08"
## with 1 features
## It has 14 fields
PNDL<-fortify(PNDL)
# get background map
sat_map = get_map(location = c(lon = -99.133549, lat = 19.3), zoom = 10, maptype = 'terrain-background', source = "google")
## plot
p_a<-ggmap(sat_map) +
geom_polygon(data = CDMX,
aes(x = long, y = lat, group = group),
color="black", fill=NA, size=1.5) +
geom_polygon(data = PNDL,
aes(x = long, y = lat, group = group),
color="red", fill=NA, size=1.5) +
geom_point(aes(x=-98.95, y=19.6),
shape=0, stroke=2, size=5, color="black") +
geom_point(aes(x=-98.95, y=19.55),
shape=0, stroke=2, size=5, color="red") +
geom_text(aes(label="CDMX", x=-98.87, y=19.6),
color="Black", fontface="bold", size=5) +
geom_text(aes(label="PNDL", x=-98.87, y=19.55),
color="Black", fontface="bold", size=5) +
theme(text = element_text(size = 20))+
ggtitle("a)")
# get background map
sat_map = get_map(location = c(lon = -99.30, lat = 19.31), zoom = 13, maptype = 'satellite', source = "google")
## add towns names
towns<-data.frame(nombre=c("San Bartolo Ameyalco",
"Santa Rosa Xochiac",
"San Mateo Tlaltenango"),
long=c(-99.270, -99.29, -99.276),
lat=c(19.333, 19.325, 19.346))
## plot
p_b<-ggmap(sat_map) +
geom_polygon(data = PNDL,
aes(x = long, y = lat, group = group),
color="red", fill=NA, size=1.5) +
geom_point(data=towns, aes(x=long, y=lat), colour="red", size=1.5) +
geom_text(data=towns, aes(label=nombre, x=long, y=lat),
color="white", fontface="bold",
size=5, nudge_y=0.003) +
# add Cruz de Coloxtitla (CX), and Convento (Cn) landmarks
geom_text(aes(label="X", x=-99.3014, y=19.286068),
color="white", fontface="bold", size=4) +
geom_text(aes(label="C", x=-99.31, y=19.3133),
color="white", fontface="bold", size=4) +
theme(text = element_text(size = 20))+
ggtitle("b)")
## plot map
# get map
sat_map = get_map(location = c(lon = -99.3060, lat = 19.2909), zoom = 14, maptype = 'satellite', source = "google")
# plot sampled plots
p_c <- ggmap(sat_map)
p_c <- p_c + geom_point(data=parcelas_tidy,
aes(x=X_coordinates_longitude,
y=X_coordinates_latitude),
color="red") +
geom_text(data=parcelas_tidy,
aes(x=X_coordinates_longitude,
y=X_coordinates_latitude,
label=plot),
color="white",
check_overlap = TRUE,
hjust = 0, vjust=1, nudge_x = 0.0005,
size= 5) +
theme(text = element_text(size = 20))+
ggtitle("c)")
The following figure shows the total number of trees sampled in each 10x10 m plot, and how many of these are under some category of damage:
p_d <- ggplot(parcelas_long, aes(x=plot, y=n_trees, fill=tree_health_simplified)) +
geom_bar(stat="identity") +
scale_fill_manual(values= my_cols, breaks = desired_order,
labels= desired_names,
name= "Health status")
p_d <- p_d + theme_bw() +
labs(x="Plots", y= "Number of trees") +
theme(text = element_text(size = 20)) +
ggtitle("d)")
multiplot(p_a, p_c, p_b, p_d, cols=2)
el estado de salud del aárbol es independiente de su condición de reforestación H:árbolres dañados por ozono dependen de si fueron reforestados
# Select tree reforested and covered data
cont_tab<- select(muestreo_tidy, contains(c("tree_health_simplified", "reforested", "tree_exposition"))) %>%
filter(tree_health_simplified == "healthy"| tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other")
table(cont_tab)
## , , tree_exposition = cover
##
## reforested
## tree_health_simplified no yes
## healthy 247 75
## ozone 42 20
## ozone_and_other 278 41
##
## , , tree_exposition = exposed
##
## reforested
## tree_health_simplified no yes
## healthy 136 31
## ozone 52 11
## ozone_and_other 157 23
tab_reforested<- as.table(array(c(sum(with(cont_tab, tree_health_simplified == "healthy" & reforested == "yes")),
sum(with(cont_tab, tree_health_simplified == "ozone" & reforested == "yes")),
sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & reforested == "yes")),
sum(with(cont_tab, tree_health_simplified == "healthy" & reforested == "no")),
sum(with(cont_tab, tree_health_simplified == "ozone" & reforested == "no")),
sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & reforested == "no"))),
dim=c(3,2), dimnames=list( c("healthy","ozone","ozone and other"), c("yes","no"))))
# Pass data matrix to chisq.posthoc.test function
names(attributes(tab_reforested)$dimnames) <- c("healthy status", "reforested")
# Barplot
p_3a<-ggplot(cont_tab, aes(reforested, ..count..)) +
geom_bar(aes(fill = tree_health_simplified), position = "dodge")+
scale_fill_manual(name ="Health status", values = c("healthy" = "darkgreen", "ozone" = "darkred", "ozone_and_other" = "orangered1"),
labels= c("healthy", "ozone","ozone and other"))+
theme_bw()+ ggtitle("a)")+ theme(legend.title.align = 0.5)+ theme(text = element_text(size = 20))+
theme(plot.title = element_text(lineheight=1.1, face="bold"))+
labs(y="Number of trees", x= "Reforested")
# Mosaic Plot with vcd library
p_3b<-mosaic(tab_reforested, shade=TRUE, legend=TRUE,
labeling_args=list(rot_labels=c(bottom=90,top=0),gp_labels=(gpar(fontsize=12))))
# Pruebas de otros mosaicos
## OPCION 1
library("graphics")
mosaicplot(tab_reforested, shade = TRUE, las=2,
main = "housetasks")
## OPCION 2
# install.packages("vcd")
library("vcd")
# plot just a subset of the table
assoc(head(tab_reforested, 5), shade = TRUE, las=2)
# Chi2
chisq <- chisq.test(tab_reforested)
chisq
##
## Pearson's Chi-squared test
##
## data: tab_reforested
## X-squared = 17.399, df = 2, p-value = 0.0001666
# Observed counts
chisq$observed
## reforested
## healthy status yes no
## healthy 106 383
## ozone 31 94
## ozone and other 64 435
# Expected counts
round(chisq$expected,2)
## reforested
## healthy status yes no
## healthy 88.31 400.69
## ozone 22.57 102.43
## ozone and other 90.12 408.88
# Pearson residuals (residuos estandarizados)
round(chisq$residuals, 3)
## reforested
## healthy status yes no
## healthy 1.882 -0.884
## ozone 1.773 -0.833
## ozone and other -2.751 1.292
# Visuaalize Pearson residuals
library(corrplot)
corrplot(chisq$residuals, is.cor = FALSE)
# Contibution in percentage (%)
contrib <- 100*chisq$residuals^2/chisq$statistic
round(contrib, 3)
## reforested
## healthy status yes no
## healthy 20.366 4.489
## ozone 18.075 3.984
## ozone and other 43.499 9.587
# Visualize the contribution
corrplot(contrib, is.cor = FALSE)
tab_covered<- as.table(array(c(sum(with(cont_tab, tree_health_simplified == "healthy" & tree_exposition == "cover")),
sum(with(cont_tab, tree_health_simplified == "ozone" & tree_exposition == "cover")),
sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & tree_exposition == "cover")),
sum(with(cont_tab, tree_health_simplified == "healthy" & tree_exposition == "exposed")),
sum(with(cont_tab, tree_health_simplified == "ozone" & tree_exposition == "exposed")),
sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & tree_exposition == "exposed"))),
dim=c(3,2), dimnames=list( c("healthy","ozone","ozone and other"), c("yes","no"))))
# Pass data matrix to chisq.posthoc.test function
names(attributes(tab_covered)$dimnames) <- c("healthy status", "covered")
#Barplot
p_3c<-ggplot(cont_tab, aes(tree_exposition, ..count..)) +
geom_bar(aes(fill = tree_health_simplified), position = "dodge")+
scale_fill_manual(name ="Health status",
values = c("healthy" = "darkgreen", "ozone" = "darkred", "ozone_and_other" = "orangered1"),
labels= c("healthy", "ozone","ozone and other"))+
theme_bw()+ ggtitle("c)")+theme(legend.title.align = 0.5)+theme(text = element_text(size = 20))+
theme(plot.title = element_text(lineheight=1.1, face="bold"))+
labs(y="Number of trees", x= "Tree exposition")
# Mosaic Plot with vcd library
p_3d <- mosaic(tab_covered, shade=TRUE, legend=TRUE, labeling_args=list(rot_labels=c(bottom=90,top=0),gp_labels=(gpar(fontsize=12))))
# Pruebas de otros mosaicos
## OPCION 1
mosaicplot(tab_covered, shade = TRUE, las=2,
main = "housetasks")
## OPCION 2
# plot just a subset of the table
assoc(head(tab_covered, 5), shade = TRUE, las=2)
# Chi2
chisq <- chisq.test(tab_covered)
chisq
##
## Pearson's Chi-squared test
##
## data: tab_covered
## X-squared = 11.524, df = 2, p-value = 0.003145
# Observed counts
chisq$observed
## covered
## healthy status yes no
## healthy 322 167
## ozone 62 63
## ozone and other 319 180
# Expected counts
round(chisq$expected,2)
## covered
## healthy status yes no
## healthy 308.87 180.13
## ozone 78.95 46.05
## ozone and other 315.18 183.82
# Pearson residuals (residuos estandarizados)
round(chisq$residuals, 3)
## covered
## healthy status yes no
## healthy 0.747 -0.979
## ozone -1.908 2.498
## ozone and other 0.215 -0.282
# Visuaalize Pearson residuals
corrplot(chisq$residuals, is.cor = FALSE)
# Contibution in percentage (%)
contrib <- 100*chisq$residuals^2/chisq$statistic
round(contrib, 3)
## covered
## healthy status yes no
## healthy 4.847 8.311
## ozone 31.589 54.163
## ozone and other 0.401 0.688
# Visualize the contribution
corrplot(contrib, is.cor = FALSE)
multiplot(p_3a, p_3c, p_3b, p_3d, cols=2)
## reforested yes no
## healthy status
## healthy 106 383
## ozone 31 94
## ozone and other 64 435
## covered yes no
## healthy status
## healthy 322 167
## ozone 62 63
## ozone and other 319 180
p <- filter(muestreo_tidy, tree_heigth<15, tree_nodes>0) %>%
ggplot(.) +
scale_fill_manual(values= my_cols, breaks = desired_order,
labels= desired_names,
name= "Health status") +
theme_bw()
p4_a <- p + geom_histogram(aes(x=tree_nodes,
fill=tree_health_simplified)) +
labs(x="Tree age (years)", y= "Number of trees") +
theme(text = element_text(size = 20)) +
theme(plot.title = element_text(lineheight=1.1, face="bold"))+
ggtitle("a)")
p4_a
## base data
# Definir plantas sanas y dañadas por otra cosa que no fuera ozono
# cond_PO<- se refiere a condition Percentage damage by Ozone
cond_PO<-as_data_frame(muestreo_tidy)
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
# Asignar 0% de daño por ozono a los árboles healthy
cond_PO$ozone_damage_percentage = ifelse(cond_PO$tree_health == "healthy", "0%", cond_PO$ozone_damage_percentage)
# Filtrar por porcentaje de daño
condition_PO<-cond_PO%>%
filter(ozone_damage_percentage == "0%" | ozone_damage_percentage == "less than 10%" | ozone_damage_percentage == "10 to 40%" | ozone_damage_percentage == "40 to 50%"| ozone_damage_percentage == "50 to 70%" | ozone_damage_percentage == "more than 70%")
condition_PO$ozone_damage_percentage <- as.factor(condition_PO$ozone_damage_percentage)
# Plot
p_od<- condition_PO %>% filter(!is.na(ozone_damage_percentage)) %>%
ggplot() +
scale_fill_manual(values= my_cols2,
breaks = desired_order_percentage,
labels = c("0%","less 10%", "10 to 40%", "40 to 50%",
"50 to 70%", "more 70%"),
name= "Ozone damage\n per tree") +
theme_bw() + theme(text = element_text(size = 20))
p4_b <- p_od +
geom_bar(aes(x=tree_nodes,
fill=ozone_damage_percentage)) +
labs(x="Tree age (years)", y= "Number of trees") +
theme(legend.title.align = 0.5)+
theme(plot.title = element_text(lineheight=1.1, face="bold"))+
ggtitle("b)")
p4_b
## Warning: Removed 147 rows containing non-finite values (stat_count).
# Filtrar por categorÃa de daño
condition_HOO<-muestreo_tidy%>%
filter(tree_health_simplified == "healthy" | tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other" )
condition_HOO$tree_health_simplified <- as.factor(condition_HOO$tree_health_simplified)
# Data distribution
# Los datos tienen a graficar es el número de nodos para cada categoria de salud.
# Los datos son continuos discretos, por lo tanto el analisis a seguir para buscar diferencias entre los grupos son:
shapiro.test(condition_HOO$tree_nodes) #NO HAY NORMALIDAD
##
## Shapiro-Wilk normality test
##
## data: condition_HOO$tree_nodes
## W = 0.94294, p-value < 2.2e-16
# Procedo a hacer una prueba no paramétrica (kruskal)
# Debe tener Homocedasticidad: la varianza debe de ser constante entre todos los grupos.
# If the p-value is less than our chosen significance level, we can reject the null hypothesis and conclude that we have enough evidence to state that the variance among the groups is not equal.
leveneTest(sqrt(tree_nodes) ~ tree_health_simplified, data = condition_HOO) #Opera con medias
# As the p value obtained from the Shapiro-Wilk test and Levene’s test is significant (p < 0.05), we conclude that the data is not normally distributed and does not have equal variance. Kruskal-Wallis test is more appropriate for analyzing differences.
kruskal.test(sqrt(tree_nodes) ~ tree_health_simplified, data = condition_HOO) #opera con mediana
##
## Kruskal-Wallis rank sum test
##
## data: sqrt(tree_nodes) by tree_health_simplified
## Kruskal-Wallis chi-squared = 265.57, df = 2, p-value < 2.2e-16
#As the p value obtained from the Kruskal-Wallis test test is significant , we conclude that there are significant differences.
# For the Kruskal-Wallis test, epsilon-squared is a method of choice for effect size measurement. The epsilon-squared is 0.74 and suggests a very strong effect of plant varieties on yield
# calculate effect size. Hay efecto relativamente fuerte
library(rcompanion)
epsilonSquared(x = sqrt(condition_HOO$tree_nodes), g = condition_HOO$tree_health_simplified)
## epsilon.squared
## 0.239
# https://peterstatistics.com/CrashCourse/3-TwoVarUnpair/NomOrd/NomOrd3c.html
# To know which plant varieties are significantly different from each other, we will perform the Dunn’s test as post-hoc test for significant Kruskal-Wallis test. As there are multiple comparisons, we will correct the p values using Benjamini-Hochberg FDR method for multiple hypothesis testing at a 5% cut-off. The other tests that can be used for post-hoc test includes Conover and Nemenyi tests. Dunn’s test is more appropriate post-hoc than the Mann-Whitney U test for significant Kruskal-Wallis test as it retains the rank sums of the Kruskal-Wallis.
#poshoc para saber que grupos difieren
pairwise.wilcox.test(x = sqrt(condition_HOO$tree_nodes), g = condition_HOO$tree_health_simplified, p.adjust.method = "holm" )
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: sqrt(condition_HOO$tree_nodes) and condition_HOO$tree_health_simplified
##
## healthy ozone
## ozone <2e-16 -
## ozone_and_other <2e-16 0.012
##
## P value adjustment method: holm
# Perorm pairwise comparisons
library(ggpubr)
my_comparisons <- list( c("healthy", "ozone"), c("healthy", "ozone_and_other"), c("ozone", "ozone_and_other") )
p4_c<-condition_HOO%>%
ggplot(aes(y= tree_nodes, x= tree_health_simplified))+
geom_boxplot(color="grey", notch = F)+
scale_color_manual(values= my_cols, labels= desired_names,
name= "Health status")+
geom_point(position="jitter",aes(color = tree_health_simplified), alpha=0.5, size= 2.5)+
xlab("")+ ylab("Tree age (years)")+
theme_bw()+
ggtitle("c)")+
theme(text = element_text(size = 20), axis.text.x=element_blank())+
theme(plot.title = element_text(lineheight=1.1, face="bold"))+
stat_compare_means(comparisons = my_comparisons, label = "p.signif")
p4_c
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing non-finite values (stat_signif).
## Warning: Removed 147 rows containing missing values (geom_point).
# Perorm pairwise comparisons
# Run Shapiro-Wilk test
shapiro.test(condition_PO$tree_nodes) #NO HAY NORMALIDAD
##
## Shapiro-Wilk normality test
##
## data: condition_PO$tree_nodes
## W = 0.94294, p-value < 2.2e-16
# Procedo a hacer un kruskal
# Debe tener homogeneidad. Si es mayor a 0.05 No hay evidencias en contra de la homogeneidad de varianzas.
leveneTest(sqrt(tree_nodes) ~ ozone_damage_percentage, data = condition_PO, center = "median")
kruskal.test(sqrt(tree_nodes) ~ ozone_damage_percentage, data = condition_PO)
##
## Kruskal-Wallis rank sum test
##
## data: sqrt(tree_nodes) by ozone_damage_percentage
## Kruskal-Wallis chi-squared = 283.95, df = 5, p-value < 2.2e-16
epsilonSquared(x = sqrt(condition_PO$tree_nodes), g = condition_PO$ozone_damage_percentage)
## epsilon.squared
## 0.255
#poshoc que grupos difieren
#order levels
condition_PO$ozone_damage_percentage<- ordered(condition_PO$ozone_damage_percentage, levels=c("0%","less than 10%", "10 to 40%", "40 to 50%", "50 to 70%","more than 70%"))
#Prueba
pairwise.wilcox.test(x = sqrt(condition_PO$tree_nodes), g = condition_PO$ozone_damage_percentage, p.adjust.method = "bonferroni" )
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: sqrt(condition_PO$tree_nodes) and condition_PO$ozone_damage_percentage
##
## 0% less than 10% 10 to 40% 40 to 50% 50 to 70%
## less than 10% < 2e-16 - - - -
## 10 to 40% < 2e-16 0.6498 - - -
## 40 to 50% < 2e-16 0.2086 1.0000 - -
## 50 to 70% < 2e-16 2.9e-06 0.0026 0.5273 -
## more than 70% 1.7e-05 1.0000 1.0000 1.0000 0.2060
##
## P value adjustment method: bonferroni
my_comparisons <- list(c("0%", "less than 10%"), c("0%", "10 to 40%"), c("0%", "40 to 50%"),
c("0%", "50 to 70%"), c("0%", "more than 70%"),
c("less than 10%", "50 to 70%"),
c("10 to 40%", "50 to 70%"))
# Plot
p4_d<-condition_PO%>% filter(!is.na(ozone_damage_percentage)) %>%
ggplot(aes(y= tree_nodes, x= ozone_damage_percentage))+
geom_boxplot(color="grey", notch = F)+
scale_color_manual(values= my_cols2,labels = c("0%","less 10%", "10 to 40%", "40 to 50%",
"50 to 70%", "more 70%"))+
geom_point(position="jitter",aes(color = ozone_damage_percentage), alpha=0.5, size= 2.5)+
xlab("")+ ylab("Tree age (years)")+
labs(color = "Ozone damage\n per tree")+
theme_bw()+
ggtitle("d)")+
theme(legend.title.align = 0.5)+
theme(text = element_text(size = 20), axis.text.x=element_blank())+
theme(plot.title = element_text(lineheight=1.1, face="bold"))+
ggpubr::stat_compare_means(comparisons = my_comparisons, label="p.signif")
p4_d
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing non-finite values (stat_signif).
## Warning: Removed 147 rows containing missing values (geom_point).
multiplot(p4_a, p4_c, p4_b, p4_d, cols=2)
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing non-finite values (stat_signif).
## Warning: Removed 147 rows containing missing values (geom_point).
## Warning: Removed 147 rows containing non-finite values (stat_count).
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing non-finite values (stat_signif).
## Warning: Removed 147 rows containing missing values (geom_point).
# Filtrar por categorÃa de daño
condition_HOO<-muestreo_tidy%>%
filter(tree_health_simplified == "healthy" | tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other" )
condition_HOO$tree_health_simplified <- as.factor(condition_HOO$tree_health_simplified)
# Modelo 3 - Edad, salud y estructura espacial afectan crecimiento
glm3<-glm(log10(tree_heigth) ~ tree_nodes*tree_health_simplified + tree_exposition + reforested, data = condition_HOO)
# Ho= hay normalidad, si pvalues es mayor a 0.05 se acepta la Ho por lo tanto pvalue mayor a 0.05 hay normalidad
shapiro.test(glm3$residuals) # Normalidad ( Datos normales si es mayor a 0.05)
##
## Shapiro-Wilk normality test
##
## data: glm3$residuals
## W = 0.99778, p-value = 0.2238
cor.test(abs(glm3$residuals), glm3$fitted.values) #Homocedasticidad: Valor no significativa sig que si hay homocedasticidad
##
## Pearson's product-moment correlation
##
## data: abs(glm3$residuals) and glm3$fitted.values
## t = 1.4722, df = 964, p-value = 0.1413
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.0157601 0.1101083
## sample estimates:
## cor
## 0.04736209
par(mfrow =c(2,2))
plot(glm3)
summary(glm3)
##
## Call:
## glm(formula = log10(tree_heigth) ~ tree_nodes * tree_health_simplified +
## tree_exposition + reforested, data = condition_HOO)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.94431 -0.19691 0.01762 0.18800 0.84646
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -0.694392 0.033429 -20.772
## tree_nodes 0.108988 0.004806 22.677
## tree_health_simplifiedozone 0.218104 0.096575 2.258
## tree_health_simplifiedozone_and_other 0.426346 0.053116 8.027
## tree_expositionexposed 0.069066 0.020785 3.323
## reforestedyes 0.032913 0.024310 1.354
## tree_nodes:tree_health_simplifiedozone -0.013930 0.010673 -1.305
## tree_nodes:tree_health_simplifiedozone_and_other -0.037654 0.006213 -6.060
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## tree_nodes < 2e-16 ***
## tree_health_simplifiedozone 0.024145 *
## tree_health_simplifiedozone_and_other 2.92e-15 ***
## tree_expositionexposed 0.000925 ***
## reforestedyes 0.176097
## tree_nodes:tree_health_simplifiedozone 0.192146
## tree_nodes:tree_health_simplifiedozone_and_other 1.95e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.08797878)
##
## Null deviance: 224.146 on 965 degrees of freedom
## Residual deviance: 84.284 on 958 degrees of freedom
## (147 observations deleted due to missingness)
## AIC: 403.34
##
## Number of Fisher Scoring iterations: 2
# Graficar modelo 3
Tree_height_plot<-ggplot(condition_HOO, aes(x = tree_nodes, y = log(tree_heigth))) + geom_point(aes(colour=tree_health_simplified), alpha=0.5, size= 2.5) + geom_smooth(method="glm",aes(color= tree_health_simplified), fullrange =T)+
labs( y = "log(Tree higth)", x = "Tree age (years)", color = "Health status")+
scale_color_manual(values= my_cols, labels= desired_names,
name= "Health status")+
theme(text = element_text(size = 20), axis.text.x=element_blank())+
theme(plot.title = element_text(lineheight=1.1, face="bold"))+
theme_bw()
Tree_height_plot
## Warning: Removed 147 rows containing non-finite values (stat_smooth).
## Warning: Removed 147 rows containing missing values (geom_point).
# plot pies in map
p_satmap <- ggmap(sat_map)
p_satmap +geom_scatterpie(data=parcelas_tidy,
aes(x=X_coordinates_longitude,
y=X_coordinates_latitude,
group=plot),
pie_scale = 1.5,
cols=desired_order,
color=NA,
alpha=1) +
scale_fill_manual(values= my_cols, breaks = desired_order,
labels= desired_names,
name= "Health status") +
theme(text = element_text(size = 20))
# Create new variable with porcentage of ozone damage
parcelas_tidy<-parcelas_tidy %>% rowwise() %>%
mutate(.,
total=sum(healthy,ozone,ozone_and_other,
drougth, acid_rain, other,
others_combined, dead, fungi,
# insect,
worm)) %>%
mutate(perc.ozone= sum(ozone, ozone_and_other)/total)
#plot
p <- ggplot(parcelas_tidy) +
geom_point(aes(x=X_coordinates_altitude,
y=perc.ozone))
p<- ggplot(parcelas_tidy, aes(X_coordinates_altitude, perc.ozone ))+
geom_point(color= "grey50", size = 3, alpha = 0.6)
p +
stat_smooth(color = "skyblue", formula = y ~ x,fill = "skyblue", method = "lm") +
stat_poly_eq(
aes(label = paste(..eq.label.., ..adj.rr.label.., sep = '~~~~')),
formula = y ~ x, parse = TRUE,
size = 10, # Tamaño de fuente de la fórmula
label.x = 0.1, #location, la proporción entre 0-1
label.y = 0.95)+
labs(x="Plot altitude", y= "Percentage of ozone damaged trees")+
theme_bw() +
theme(plot.title = element_text(lineheight=1.1, face="bold")) +
theme(text = element_text(size = 20))
# No se incluye a M (37) y PP (6), ya que eran colectados por diferentes miembros del team
trees<- c(135,133,130,174,150,139,165,166,133,137,125,105)
rangers<- c("A","B","C","D","E","F","G","H", "I", "J", "K", "L")
mean(trees)
## [1] 141
sd(trees)
## [1] 19.60519
# Create new variable with porcentage of ozone damage
parcelas_tidy<-parcelas_tidy %>% rowwise() %>%
mutate(.,
total=sum(healthy,ozone,ozone_and_other,
drougth, acid_rain, other,
others_combined, dead, fungi,
# insect,
worm)) %>%
mutate(perc.other= sum(healthy,
drougth, acid_rain, other,
others_combined, dead, fungi,worm)/total)
status_HS<- c("healthy","ozone","ozone_and_other",
"drougth", "acid_rain", "other",
"others_combined", "dead", "fungi",
"worm")
count_HS<-c(sum(parcelas_tidy$healthy), sum(parcelas_tidy$ozone),
sum(parcelas_tidy$ozone_and_other),sum(parcelas_tidy$drougth),
sum(parcelas_tidy$acid_rain),sum(parcelas_tidy$other),
sum(parcelas_tidy$others_combined), sum(parcelas_tidy$dead),
sum(parcelas_tidy$fungi), sum(parcelas_tidy$worm))
percentageH<-data.frame(status_HS, count_HS)
(sum(c(parcelas_tidy$ozone, parcelas_tidy$ozone_and_other))*100)/sum(count_HS)
## [1] 35.43307
(sum(parcelas_tidy$healthy)*100)/sum(count_HS)
## [1] 27.67154
(sum(c(parcelas_tidy$drougth, parcelas_tidy$acid_rain , parcelas_tidy$other, parcelas_tidy$others_combined,parcelas_tidy$dead, parcelas_tidy$fungi, parcelas_tidy$worm))*100)/sum(count_HS)
## [1] 36.89539
#Percentage damage plots
PDP<-data.frame(parcelas_tidy$plot, parcelas_tidy$perc.ozone)
PDP<-PDP[order(PDP$parcelas_tidy.plot),]
range(PDP$parcelas_tidy.perc.ozone)
## [1] 0.0000000 0.8181818
head(PDP[order(PDP$parcelas_tidy.perc.ozone),])